## Prepare manifesto model

#reticulate:: use_python("SET_PYTHON_ENVIRONMENT")
library(reticulate)
transformers <- import("transformers")
autotoken <- transformers$AutoTokenizer
######### classification
pol_tokens <- autotoken$from_pretrained("xlm-roberta-large", 
                                        padding=TRUE, truncation=TRUE,
                                        clean_up_tokenization_spaces = FALSE)


policy_score <- transformers$pipeline(model = "manifesto-project/manifestoberta-xlm-roberta-56policy-topics-sentence-2024-1-1",
                                      task = "text-classification",
                                      tokenizer = pol_tokens, device = "mps",
                                      trust_remote_code=TRUE)

  

# classification Bakker & Hobolt 2013
right_emp <- c("104 - Military: Positive",
               "201 - Freedom and Human Rights",
               "203 - Constitutionalism: Positive",
               "305 - Political Authority",
               "401 - Free Market Economy",
               "402 - Incentives",
               "407 - Protectionism: Negative",
               "414 - Economic Orthodoxy",
               "505 - Welfare State Limitation",
               "601 - National Way of Life: Positive",
               "603 - Traditional Morality: Positive",
               "605 - Law and Order: Positive",
               "606 - Civic Mindedness: Positive"
)

left_emp <- c("103 - Anti-Imperialism",
              "105 - Military: Negative",
              "106 - Peace",
              "107 - Internationalism: Positive",
              "202 - Democracy",
              "403 - Market Regulation",
              "404 - Economic Planning",
              "406 - Protectionism: Positive",
              "412 - Controlled Economy",
              "413 - Nationalisation",
              "504 - Welfare State Limitation",
              "506 - Education expansion",
              "701 - Labour Groups: Positive"
)

right_econ <- c("401 - Free Market Economy",
                "402 - Incentives",
                "407 - Protectionism: Negative",
                "505 - Welfare State Limitation",
                "507 - Education Limitation",
                "410 - Economic Growth: Positive",
                "702 - Labour Groups: Negative"
)

left_econ <- c("403 - Market Regulation",
               "404 - Economic Planning",
               "406 - Protectionism: Positive",
               "504 - Welfare State Limitation",
               "506 - Education expansion",
               "413 - Nationalisation",
               "412 - Controlled Economy",
               "701 - Labour Groups: Positive",
               "405 - Corporatism/ Mixed Economy",
               "409 - Keynesian Demand Management",
               "415 - Marxist analysis: positive",
               "503 - Equality: Positive"
)

right_aut <- c("305 - Political Authority",
               "601 - National Way of Life: Positive",
               "603 - Traditional Morality: Positive",
               "605 - Law and Order: Positive",
               "608 - Multiculturalism: negative",
               "606 - Civic Mindedness: Positive"
)

left_lib <- c("501 - Environmental Protection: Positive",
              "602 - Natural Way of Life: Negative",
              "604 - Traditional Morality: Negative",
              "502 - Culture: Positive",
              "607 - Multiculturalism: Positive",
              "416 - Anti-Growth Economy: Positive",
              "705 - Underprivileged Minority Groups",
              "201 - Freedom and Human Rights",
              "202 - Democracy"
)




importance <- function(lr) {
  l <- sum(lr=="Left")
  r <-  sum(lr=="Right")
  n <-  length(lr)
  log(r+l+1) - log(n)
}


manifest_positions <- function(legis, left, right){ 
  temptext <- list()

    x <- legis
    if(stringr::str_detect(x, "Done at Brussels")) { 
      x <-   stringr::str_split_i(x, "Done at Brussels", i = 1)
    }else if(stringr::str_detect(x, "Done at Luxembourg")) { 
      x <-  stringr::str_split_i(x, "Done at Luxembourg", i = 1)
    } else {
      x
    }
    x |> 
      tokenizers::tokenize_sentences(, lowercase = TRUE) |> 
      unlist() |> 
      tokenizers::chunk_text(chunk_size = 100) |> # split implausible long sentences into chuncks 
      unlist() -> tmp
    tmp <- tmp[-c(1:7)]
    dplyr::bind_rows()
  
    tmp <- policy_score(out$text)

 labs <- sapply(seq(tmp), function(i) tmp[[i]]$label)
 lr <- ifelse(is.element(labs,right), "Right", ifelse(is.element(labs,left), "Left", "Neutral"))
 pos <- sapply(unique(out$id), function(x) position(lr[out$id == x]))
#
}
